#### SET UP ####

rm(list = ls())

pcks <- c("dplyr", "tidyr", "stringr", "stargazer", "sandwich", "lmtest", "reshape", "reshape2", "scales", "xtable")

## Install if not there
for(p in pcks){
  if(!p %in% installed.packages()[,"Package"]){
    print(p)
    install.packages(p)
  }
}
rm(pcks)

library(here)
library(dplyr)
library(tidyr)
library(reshape2)
library(reshape)
library(stringr)
library(stargazer)
library(sandwich)
library(lmtest)
library(scales)
library(xtable)


### original variables for each round and game ## 

list_ca1 <- paste("collective_action01.", 1:5, ".player.action_choice", sep = "")
list_ca2 <- paste("collective_action02.", 1:5, ".player.action_choice", sep = "")
list_ca3 <- paste("collective_action03.", 1:5, ".player.action_choice", sep = "")
list_ca4 <- paste("collective_action04.", 1:5, ".player.action_choice", sep = "")
list_ca5 <- paste("collective_action05.", 1:5, ".player.action_choice", sep = "")

list_rd1 <- paste("collective_action0", 1:5, ".1.player.action_choice", sep = "")
list_rd2 <- paste("collective_action0", 1:5, ".2.player.action_choice", sep = "")
list_rd3 <- paste("collective_action0", 1:5, ".3.player.action_choice", sep = "")
list_rd4 <- paste("collective_action0", 1:5, ".4.player.action_choice", sep = "")
list_rd5 <- paste("collective_action0", 1:5, ".5.player.action_choice", sep = "")

list_cost2 <- paste("collective_action02.", 1:5, ".group.punish_cost", sep = "")
list_cost3 <- paste("collective_action03.", 1:5, ".group.punish_cost", sep = "")
list_cost4 <- paste("collective_action04.", 1:5, ".group.punish_cost", sep = "")
list_cost5 <- paste("collective_action05.", 1:5, ".group.punish_cost", sep = "")

################# Pilot Data Set-up ###############

data_pilot <- read.csv("cheung-siegel-2023_pilot_data.csv")

# Data cleaning code 

data_pilot <- data_pilot %>%
  filter(participant._current_app_name == "demographics") %>%
  select(-c(participant._is_bot, participant._index_in_pages, participant._max_page_index, participant._current_app_name, participant._current_page_name, participant.visited, participant.mturk_worker_id, participant.mturk_assignment_id, session.label, session.experimenter_name, session.mturk_HITId, session.mturk_HITGroupId, session.comment, session.is_demo, session.config.participation_fee, consent.1.player.id_in_group, consent.1.player.consent, consent.1.player.is_dropout, consent.1.player.payoff, consent.1.group.id_in_subsession, consent.1.subsession.round_number)) %>%
  mutate(test_lab.code = ifelse(session.code == "5r651efz", 1, 
                                ifelse(session.code == "sa409hae", 2, 
                                       ifelse(session.code == "hct2xzhh", 3, 
                                              ifelse(session.code == "vmrwp9l4" | session.code == "jm9ojm9l", 4, 
                                                     ifelse(session.code == "r30a5kwo", 5, 
                                                            ifelse(session.code == "spv9ir1o", 6, 
                                                                   ifelse(session.code == "ghkak51o", 7, 
                                                                          ifelse(session.code == "pa8hdsso", 8, 
                                                                                 ifelse(session.code == "ix3hurx0", 9, ""))))))))))

##### Individual Level Data

data_pilot.1 <- data_pilot  %>%
  group_by(session.code, collective_action01.1.player.cohort) %>% 
  mutate(group_id = cur_group_id()) %>%
  mutate(treatment = ifelse(!is.na(collective_action02.3.group.group_contribution), 1, 2)) %>%
  dplyr:: rename(cohort = collective_action01.1.player.cohort)

## Convert wide data to long data with participant's action_choice as the primary variable

data_long_pilot.action <- 
  reshape2::melt(data_pilot.1, 
                 id.vars = c("participant.code", "participant.label", "participant.payoff", "group_id", "session.code", "cohort", "treatment", "test_lab.code"), 
                 measure.vars=c(list_ca1, list_ca2, list_ca3, list_ca4, list_ca5), 
                 variable.name = "ca_round", 
                 value.name = "action_choice")

## Convert wide data to long data with randomly allocated punishment cost levels as the primary variable

data_long_pilot.punish <- 
  reshape2::melt(data_pilot.1, 
                 id.vars = c("participant.code", "participant.label", "participant.payoff", "group_id", "session.code", "cohort", "treatment", "test_lab.code"), 
                 measure.vars=c(list_cost2, list_cost3, list_cost4, list_cost5), 
                 variable.name = "ca_round", 
                 value.name = "punish_cost")

# Renaming "ca_round" variable  

## Define a function to convert a single level to a new format
convert_level <- function(old_level) {
  parts <- unlist(strsplit(old_level, "[.]"))
  parts2 <- str_sub(parts[1], -1, -1)
  new_level <- paste0("ca", parts2, "_", parts[2])
  return(new_level)
}

## Check existing levels in data_long_pilot2 and create new_levels
existing_levels_action <- levels(data_long_pilot.action$ca_round)
new_levels_action <- sapply(existing_levels_action, convert_level)

existing_levels_punish <- levels(data_long_pilot.punish$ca_round)
new_levels_punish <- sapply(existing_levels_punish, convert_level)

## Update levels of data
data_long_pilot.action2 <- data_long_pilot.action %>%
  mutate(ca_round = factor(ca_round, levels = existing_levels_action, labels = new_levels_action))

data_long_pilot.punish2 <- data_long_pilot.punish %>%
  mutate(ca_round = factor(ca_round, levels = existing_levels_punish, labels = new_levels_punish))


## Merge long action_choice data and punish_cost data

data_long_pilot.merged <- merge(data_long_pilot.action2, data_long_pilot.punish2, by = c("participant.code", "participant.label", "ca_round", "treatment", "participant.payoff", "session.code", "group_id", "cohort", "test_lab.code"), all.x = FALSE, all.y = TRUE) 


## Adding other variables 

data_long_pilot.merged <- data_long_pilot.merged %>%
  mutate(punish_risk = ifelse(ca_round %in% c("ca1_1", "ca1_2", "ca1_3", "ca1_4", "ca1_5"), 0, 
                              ifelse(ca_round %in% c("ca2_1", "ca3_1", "ca4_1", "ca5_1"), 0.25, 
                                     ifelse(ca_round %in% c("ca2_2", "ca2_4", "ca3_2", "ca3_4", "ca4_2", "ca4_4", "ca5_2", "ca5_4"), 0.5, 
                                            ifelse(ca_round %in% c("ca2_3", "ca2_5", "ca3_3", "ca3_5", "ca4_3", "ca4_5", "ca5_3", "ca5_5"), 0.75, NA))))) %>%
  mutate(immunity_prob = ifelse(ca_round %in% c("ca3_1", "ca3_4", "ca5_1", "ca5_4"), 0.5, 
                                ifelse(ca_round %in% c("ca3_2", "ca3_3", "ca3_5", "ca5_2", "ca5_3", "ca5_5"), 0.33, NA))) %>%
  mutate(sub_treatment = 
           ifelse(ca_round %in% c("ca1_1", "ca1_2", "ca1_3", "ca1_4", "ca1_5"), "ca1", 
                  ifelse(ca_round %in% c("ca2_1", "ca2_2", "ca2_3", "ca2_4", "ca2_5"), "ca2", 
                         ifelse(ca_round %in% c("ca3_1", "ca3_2", "ca3_3", "ca3_4", "ca3_5"), "ca3", 
                                ifelse(ca_round %in% c("ca4_1", "ca4_2", "ca4_3", "ca4_4", "ca4_5"), "ca4", 
                                       ifelse(ca_round %in% c("ca5_1", "ca5_2", "ca5_3", "ca5_4", "ca5_5"), "ca5", NA)))))) %>%
  mutate(immunity_treatment = 
           ifelse(ca_round %in% c("ca3_1", "ca3_2", "ca3_3", "ca3_4", "ca3_5", "ca5_1", "ca5_2", "ca5_3", "ca5_4", "ca5_5"), 1, 0))

  
### Group Data 

list_grp1 <- paste("collective_action01.", 1:5, ".group.group_contribution", sep = "")
list_grp2 <- paste("collective_action02.", 1:5, ".group.group_contribution", sep = "")
list_grp3 <- paste("collective_action03.", 1:5, ".group.group_contribution", sep = "")
list_grp4 <- paste("collective_action04.", 1:5, ".group.group_contribution", sep = "")
list_grp5 <- paste("collective_action05.", 1:5, ".group.group_contribution", sep = "")
#list_ca_all <- paste("collective_action0", 1:5, ".", 1:5, ".player.action_choice", sep = "")

data_pilot.2 <- data_pilot.1 %>% 
  group_by(group_id) %>%
  slice_head()

data_long_pilotgroup.contribution <- 
  reshape2::melt(data_pilot.2, 
                 id.vars = c("group_id", "session.code", "cohort", "treatment", "test_lab.code"), 
                 measure.vars=c(list_grp1, list_grp2, list_grp3, list_grp4, list_grp5), 
                 variable.name = "ca_round", 
                 value.name = "group_contribution"
  )

data_long_pilotgroup.cost <- 
  reshape2::melt(data_pilot.2, 
                 id.vars = c("group_id", "session.code", "cohort", "treatment", "test_lab.code"), 
                 measure.vars=c(list_cost2, list_cost3, list_cost4, list_cost5), 
                 variable.name = "ca_round", 
                 value.name = "punish_cost"
  )

## Check existing levels in data_long_pilot2 and create new_levels
existing_levels_g.action <- levels(data_long_pilotgroup.contribution$ca_round)
new_levels_g.action <- sapply(existing_levels_g.action, convert_level)

existing_levels_g.punish <- levels(data_long_pilotgroup.cost$ca_round)
new_levels_g.punish <- sapply(existing_levels_g.punish, convert_level)

## Update levels of data
data_long_pilotgroup.contribution2 <- data_long_pilotgroup.contribution %>%
  mutate(ca_round = factor(ca_round, levels = existing_levels_g.action, labels = new_levels_g.action))

data_long_pilotgroup.cost2 <- data_long_pilotgroup.cost %>%
  mutate(ca_round = factor(ca_round, levels = existing_levels_g.punish, labels = new_levels_g.punish))

data_long_pilot.g.merged <- merge(data_long_pilotgroup.contribution2, data_long_pilotgroup.cost2, by = c("ca_round", "treatment", "session.code", "group_id", "cohort", "test_lab.code"), all.x = FALSE, all.y = FALSE) 


data_long_pilot.g.merged <- data_long_pilot.g.merged %>%
  mutate(punish_risk = ifelse(ca_round %in% c("ca1_1", "ca1_2", "ca1_3", "ca1_4", "ca1_5"), 0, 
                              ifelse(ca_round %in% c("ca2_1", "ca3_1", "ca4_1", "ca5_1"), 0.25, 
                                     ifelse(ca_round %in% c("ca2_2", "ca2_4", "ca3_2", "ca3_4", "ca4_2", "ca4_4", "ca5_2", "ca5_4"), 0.5, 
                                            ifelse(ca_round %in% c("ca2_3", "ca2_5", "ca3_3", "ca3_5", "ca4_3", "ca4_5", "ca5_3", "ca5_5"), 0.75, NA))))) %>%
  mutate(immunity_prob = ifelse(ca_round %in% c("ca3_1", "ca3_4", "ca5_1", "ca5_4"), 0.5, 
                                ifelse(ca_round %in% c("ca3_2", "ca3_3", "ca3_5", "ca5_2", "ca5_3", "ca5_5"), 0.33, NA))) %>%
  mutate(sub_treatment = 
           ifelse(ca_round %in% c("ca1_1", "ca1_2", "ca1_3", "ca1_4", "ca1_5"), "ca1", 
                  ifelse(ca_round %in% c("ca2_1", "ca2_2", "ca2_3", "ca2_4", "ca2_5"), "ca2", 
                         ifelse(ca_round %in% c("ca3_1", "ca3_2", "ca3_3", "ca3_4", "ca3_5"), "ca3", 
                                ifelse(ca_round %in% c("ca4_1", "ca4_2", "ca4_3", "ca4_4", "ca4_5"), "ca4", 
                                       ifelse(ca_round %in% c("ca5_1", "ca5_2", "ca5_3", "ca5_4", "ca5_5"), "ca5", NA)))))) %>%
  mutate(immunity_treatment = 
           ifelse(ca_round %in% c("ca3_1", "ca3_2", "ca3_3", "ca3_4", "ca3_5", "ca5_1", "ca5_2", "ca5_3", "ca5_4", "ca5_5"), 1, 0))



### Debug sample

data_long_pilot.merged2 <- data_long_pilot.merged %>% 
  filter(!(test_lab.code == 1 & ca_round %in% c("ca4_1", "ca4_2", "ca4_3", "ca4_4", "ca4_5"))) %>%
  filter(!(test_lab.code == 2 & ca_round %in% c("ca4_1", "ca4_2", "ca4_3", "ca4_4", "ca4_5"))) %>% 
  filter(!(ca_round %in% c("ca3_1", "ca3_2", "ca3_3", "ca3_4", "ca3_5", "ca5_1", "ca5_2", "ca5_3", "ca5_4", "ca5_5"))) 
  

data_long_pilot.g.merged2 <- data_long_pilot.g.merged  %>% 
  filter(!(test_lab.code == 1 & ca_round %in% c("ca4_1", "ca4_2", "ca4_3", "ca4_4", "ca4_5"))) %>%
  filter(!(test_lab.code == 2 & ca_round %in% c("ca4_1", "ca4_2", "ca4_3", "ca4_4", "ca4_5"))) %>% 
  filter(!(ca_round %in% c("ca1_1", "ca1_2", "ca1_3", "ca1_4", "ca1_5", "ca3_1", "ca3_2", "ca3_3", "ca3_4", "ca3_5", "ca5_1", "ca5_2", "ca5_3", "ca5_4", "ca5_5"))) # %>%

## Selecting just punishment phases

data_long_pilot.merged3 <- data_long_pilot.merged2 %>%
  filter(str_detect(ca_round, pattern = "ca2|ca4"))

data_long_pilot.g.merged3 <- data_long_pilot.g.merged2 %>%
  filter(str_detect(ca_round, pattern = "ca2|ca4"))

########## Main Data Set-up ################

data_main <- read.csv("cheung-siegel-2023_final_data.csv")

data_main <- data_main %>% 
  filter(participant._current_app_name == "demographics") %>%
  select(-c(participant._is_bot, participant._index_in_pages, participant._max_page_index, participant._current_app_name, participant._current_page_name, participant.visited, participant.mturk_worker_id, participant.mturk_assignment_id, session.label, session.mturk_HITId, session.mturk_HITGroupId, session.comment, session.is_demo, session.config.participation_fee, consent.1.player.id_in_group, consent.1.player.consent, consent.1.player.is_dropout, consent.1.player.payoff, consent.1.group.id_in_subsession, consent.1.subsession.round_number)) %>%
  mutate(test_lab.code = ifelse(session.code == "bxla8kod", 14, 
                                ifelse(session.code == "ty225eto", 15, 
                                       ifelse(session.code == "1e99hjf5", 12, 
                                              ifelse(session.code == "s21ztdl5", 13, 
                                                     ifelse(session.code == "n6dp0ozb", 10, 
                                                            ifelse(session.code == "7qznp870", 11, 
                                                                   ifelse(session.code == "j8lofo1u", 20,
                                                                          ifelse(session.code == "wmira38a", 21,
                                                                                 ifelse(session.code == "eq1oecel", 16,
                                                                                        ifelse(session.code == "ksn8rycf", 17,
                                                                                               ifelse(session.code == "72a7yyfm", 18,
                                                                                                      ifelse(session.code == "3j7k00jd", 19,
                                                                                                             ifelse(session.code == "5mqbnx53", 22,
                                                                                                                    ifelse(session.code == "2lqwwgr1", 23,
                                                                                                                           ifelse(session.code == "52xbs1c1", 24,
                                                                                                                                  ifelse(session.code == "l8cdo526", 29,
                                                                                                                                         " ")))))))))))))))))


# Individual Data Set-up

data_main.1 <- data_main  %>%
  group_by(session.code, collective_action01.1.player.cohort) %>% 
  mutate(group_id = cur_group_id()) %>%
  dplyr:: rename(cohort = collective_action01.1.player.cohort) %>%
  dplyr:: rename(treatment = collective_action01.1.group.group_treatment) %>% 
  mutate(group_size = collective_action01.1.player.gsize)


data_main.indiv.action <- 
  reshape2::melt(data_main.1, 
                 id.vars = c("participant.code", "participant.label", "participant.payoff", "group_id", "session.code", "cohort", "treatment", "test_lab.code", "group_size"), 
                 measure.vars=c(list_ca1, list_ca2, list_ca3, list_ca4, list_ca5), 
                 variable.name = "ca_round", 
                 value.name = "action_choice")

## Update levels of data
existing_levels_main.action <- levels(data_main.indiv.action$ca_round)
new_levels_main.action <- sapply(existing_levels_main.action, convert_level)
data_main.indiv.action <- data_main.indiv.action %>%
  mutate(ca_round = factor(ca_round, levels = existing_levels_main.action, labels = new_levels_main.action))



data_main.indiv.action <- data_main.indiv.action %>%
  mutate(punish_risk = ifelse(ca_round %in% c("ca1_1", "ca1_2", "ca1_3", "ca1_4", "ca1_5"), 0, 
                              ifelse(ca_round %in% c("ca2_1", "ca3_1", "ca4_1", "ca5_1"), 0.25, 
                                     ifelse(ca_round %in% c("ca2_2", "ca2_4", "ca3_2", "ca3_4", "ca4_2", "ca4_4", "ca5_2", "ca5_4"), 0.5, 
                                            ifelse(ca_round %in% c("ca2_3", "ca2_5", "ca3_3", "ca3_5", "ca4_3", "ca4_5", "ca5_3", "ca5_5"), 0.75, NA))))) %>%
  mutate(punish_cost = ifelse(ca_round %in% c("ca2_1", "ca2_2", "ca2_3", "ca2_4", "ca2_5", "ca3_1", "ca3_2", "ca3_3", "ca3_4", "ca3_5",  "ca4_1", "ca4_2", "ca4_3", "ca4_4", "ca4_5", "ca5_1", "ca5_2", "ca5_3", "ca5_4", "ca5_5"), 1.5, NA)) %>%
  mutate(immunity_prob = ifelse(ca_round %in% c("ca3_1", "ca3_2", "ca3_3", "ca5_1", "ca5_2", "ca5_3"), 0.25, 
                                ifelse(ca_round %in% c("ca3_4", "ca3_5", "ca5_4", "ca5_5"), 0.5, NA))) %>%
  mutate(sub_treatment = 
           ifelse(ca_round %in% c("ca1_1", "ca1_2", "ca1_3", "ca1_4", "ca1_5"), "ca1", 
                  ifelse(ca_round %in% c("ca2_1", "ca2_2", "ca2_3", "ca2_4", "ca2_5"), "ca2", 
                         ifelse(ca_round %in% c("ca3_1", "ca3_2", "ca3_3", "ca3_4", "ca3_5"), "ca3", 
                                ifelse(ca_round %in% c("ca4_1", "ca4_2", "ca4_3", "ca4_4", "ca4_5"), "ca4", 
                                       ifelse(ca_round %in% c("ca5_1", "ca5_2", "ca5_3", "ca5_4", "ca5_5"), "ca5", NA)))))) %>%
  mutate(immunity_treatment = 
           ifelse(ca_round %in% c("ca3_1", "ca3_2", "ca3_3", "ca3_4", "ca3_5", "ca5_1", "ca5_2", "ca5_3", "ca5_4", "ca5_5"), 1, 0))

# Group Data Set-up

data_main.2 <- data_main.1 %>% 
  group_by(group_id) %>%
  slice_head()


data_main.group.action <- 
  reshape2::melt(data_main.2, 
                 id.vars = c("group_id", "session.code", "cohort", "treatment", "test_lab.code", "group_size"), 
                 measure.vars=c(list_grp1, list_grp2, list_grp3, list_grp4, list_grp5), 
                 variable.name = "ca_round", 
                 value.name = "group_contribution"
  )

## Update levels of data
existing_levels_main.gaction <- levels(data_main.group.action$ca_round)
new_levels_main.gaction <- sapply(existing_levels_main.gaction, convert_level)
data_main.group.action <- data_main.group.action %>%
  mutate(ca_round = factor(ca_round, levels = existing_levels_main.gaction, labels = new_levels_main.gaction))

data_main.group.action <- data_main.group.action %>% 
  mutate(punish_risk = ifelse(ca_round %in% c("ca1_1", "ca1_2", "ca1_3", "ca1_4", "ca1_5"), 0, 
                              ifelse(ca_round %in% c("ca2_1", "ca3_1", "ca4_1", "ca5_1"), 0.25, 
                                     ifelse(ca_round %in% c("ca2_2", "ca2_4", "ca3_2", "ca3_4", "ca4_2", "ca4_4", "ca5_2", "ca5_4"), 0.5, 
                                            ifelse(ca_round %in% c("ca2_3", "ca2_5", "ca3_3", "ca3_5", "ca4_3", "ca4_5", "ca5_3", "ca5_5"), 0.75, NA))))) %>%
  mutate(punish_cost = ifelse(ca_round %in% c("ca2_1", "ca2_2", "ca2_3", "ca2_4", "ca2_5", "ca3_1", "ca3_2", "ca3_3", "ca3_4", "ca3_5",  "ca4_1", "ca4_2", "ca4_3", "ca4_4", "ca4_5", "ca5_1", "ca5_2", "ca5_3", "ca5_4", "ca5_5"), 1.5, NA)) %>%
  mutate(immunity_prob = ifelse(ca_round %in% c("ca3_1", "ca3_2", "ca3_3", "ca5_1", "ca5_2", "ca5_3"), 0.25, 
                                ifelse(ca_round %in% c("ca3_4", "ca3_5", "ca5_4", "ca5_5"), 0.5, NA))) %>%
  mutate(sub_treatment = 
           ifelse(ca_round %in% c("ca1_1", "ca1_2", "ca1_3", "ca1_4", "ca1_5"), "ca1", 
                  ifelse(ca_round %in% c("ca2_1", "ca2_2", "ca2_3", "ca2_4", "ca2_5"), "ca2", 
                         ifelse(ca_round %in% c("ca3_1", "ca3_2", "ca3_3", "ca3_4", "ca3_5"), "ca3", 
                                ifelse(ca_round %in% c("ca4_1", "ca4_2", "ca4_3", "ca4_4", "ca4_5"), "ca4", 
                                       ifelse(ca_round %in% c("ca5_1", "ca5_2", "ca5_3", "ca5_4", "ca5_5"), "ca5", NA)))))) %>%
  mutate(immunity_treatment = 
           ifelse(ca_round %in% c("ca3_1", "ca3_2", "ca3_3", "ca3_4", "ca3_5", "ca5_1", "ca5_2", "ca5_3", "ca5_4", "ca5_5"), 1, 0)) %>%
  mutate(period = 
           ifelse(ca_round %in% c("ca1_1", "ca2_1", "ca3_1", "ca4_1", "ca5_1"), 1, 
                  ifelse(ca_round %in% c("ca1_2", "ca2_2", "ca3_2", "ca4_2", "ca5_2"), 2, 
                         ifelse(ca_round %in% c("ca1_3", "ca2_3", "ca3_3", "ca4_3", "ca5_3"), 3, 
                                ifelse(ca_round %in% c("ca1_4", "ca2_4", "ca3_4", "ca4_4", "ca5_4"), 4, 
                                       ifelse(ca_round %in% c("ca1_5", "ca2_5", "ca3_5", "ca4_5", "ca5_5"), 5, NA))))))

data_subset_group.immune <- data_main.group.action %>%
  ungroup() %>%
  filter(str_detect(ca_round, pattern = "ca3") | str_detect(ca_round, pattern = "ca5")) 

############ Immunity Data 

list_ca1c <- paste("collective_action01.", 1:5, ".player.immunity", sep = "")
list_ca2c <- paste("collective_action02.", 1:5, ".player.immunity", sep = "")
list_ca3c <- paste("collective_action03.", 1:5, ".player.immunity", sep = "")
list_ca4c <- paste("collective_action04.", 1:5, ".player.immunity", sep = "")
list_ca5c <- paste("collective_action05.", 1:5, ".player.immunity", sep = "")


data_main.indiv.immune <- 
  reshape2::melt(data_main.1, 
                 id.vars = c("participant.code", "participant.label", "participant.payoff", "group_id", "session.code", "cohort", "treatment", "test_lab.code", "group_size"), 
                 measure.vars=c(list_ca3c, list_ca5c), 
                 variable.name = "ca_round", 
                 value.name = "player_immunity")

## Update levels of data
existing_levels_main.immune <- levels(data_main.indiv.immune$ca_round)
new_levels_main.immune <- sapply(existing_levels_main.immune, convert_level)
data_main.indiv.immune <- data_main.indiv.immune %>%
  mutate(ca_round = factor(ca_round, levels = existing_levels_main.immune, labels = new_levels_main.immune))

data_long_main.merged <- merge(data_main.indiv.immune, data_main.indiv.action, by = c("participant.code", "participant.label", "participant.payoff", "session.code", "group_id", "cohort", "test_lab.code","ca_round", "treatment", "group_size"), all.x = TRUE)


data_long_main.merged <- data_long_main.merged %>%
  filter(!is.na(action_choice)) %>%
  mutate(p_immunity = replace_na(player_immunity, 0))

####  Risk Controls - RISK SURVEY + HOLT LAURY

data_main.1$kam_q1 <- rescale(data_main.1$kam_survey.1.player.q1)
data_main.1$kam_q2 <- 1 - rescale(as.numeric(factor(data_main.1$kam_survey.2.player.q2, levels = c("Definitely continue playing", "Probably continue playing", "Not sure", "Probably take my winnings","Definitely take my winnings"))))
data_main.1$kam_q3 <- rescale(as.numeric(factor(data_main.1$kam_survey.3.player.q3, levels = c("Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree"))))
data_main.1$kam_q4 <- rescale(as.numeric(factor(data_main.1$kam_survey.4.player.q4, levels = c("Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree"))))
data_main.1$kam_q5 <- rescale(as.numeric(factor(data_main.1$kam_survey.5.player.q5, levels = c("Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree"))))
data_main.1$kam_q6 <- rescale(as.numeric(factor(data_main.1$kam_survey.6.player.q6, levels = c("Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree"))))
data_main.1$kam_q7 <- rescale(as.numeric(factor(data_main.1$kam_survey.7.player.q7, levels = c("Difficult", "Somewhat Difficult", "Somewhat Easy", "Very Easy"))))


data_main.3 <- data_main.1 %>% 
  mutate(riskscale = (kam_q1 + kam_q2 + kam_q3 + kam_q4 + kam_q5 + kam_q6 + kam_q7)/7) %>%
  mutate_at(c("holt_and_laury.1.player.lottery_1", "holt_and_laury.1.player.lottery_2", "holt_and_laury.1.player.lottery_3", "holt_and_laury.1.player.lottery_4", "holt_and_laury.1.player.lottery_5", "holt_and_laury.1.player.lottery_6", "holt_and_laury.1.player.lottery_7", "holt_and_laury.1.player.lottery_8", "holt_and_laury.1.player.lottery_9", "holt_and_laury.1.player.lottery_10"), ~ifelse(. == "Option A", 1, 0)) %>%
  mutate(holt_sum = holt_and_laury.1.player.lottery_1 + holt_and_laury.1.player.lottery_2 + holt_and_laury.1.player.lottery_3 + holt_and_laury.1.player.lottery_4 + holt_and_laury.1.player.lottery_5 + holt_and_laury.1.player.lottery_6 + holt_and_laury.1.player.lottery_7 + holt_and_laury.1.player.lottery_8 + holt_and_laury.1.player.lottery_9 + holt_and_laury.1.player.lottery_10)


data_long_main.controls <- 
  reshape2::melt(data_main.3, 
                 id.vars = c("participant.code", "participant.label", "participant.payoff", "group_id", "session.code", "cohort", "treatment", "test_lab.code", "riskscale", "holt_sum", "group_size"), 
                 measure.vars=c(list_ca1, list_ca2, list_ca3, list_ca4, list_ca5), 
                 variable.name = "ca_round", 
                 value.name = "action_choice")

existing_levels_main.controls <- levels(data_long_main.controls$ca_round)
new_levels_main.controls <- sapply(existing_levels_main.controls, convert_level)
data_main.indiv.controls <- data_long_main.controls %>%
  mutate(ca_round = factor(ca_round, levels = existing_levels_main.controls, labels = new_levels_main.controls))

data_main.indiv.controls <- data_main.indiv.controls %>%
  mutate(punish_risk = ifelse(ca_round %in% c("ca1_1", "ca1_2", "ca1_3", "ca1_4", "ca1_5"), 0, 
                              ifelse(ca_round %in% c("ca2_1", "ca3_1", "ca4_1", "ca5_1"), 0.25, 
                                     ifelse(ca_round %in% c("ca2_2", "ca2_4", "ca3_2", "ca3_4", "ca4_2", "ca4_4", "ca5_2", "ca5_4"), 0.5, 
                                            ifelse(ca_round %in% c("ca2_3", "ca2_5", "ca3_3", "ca3_5", "ca4_3", "ca4_5", "ca5_3", "ca5_5"), 0.75, NA))))) %>%
  mutate(punish_cost = ifelse(ca_round %in% c("ca2_1", "ca2_2", "ca2_3", "ca2_4", "ca2_5", "ca3_1", "ca3_2", "ca3_3", "ca3_4", "ca3_5",  "ca4_1", "ca4_2", "ca4_3", "ca4_4", "ca4_5", "ca5_1", "ca5_2", "ca5_3", "ca5_4", "ca5_5"), 1.5, NA)) %>%
  mutate(immunity_prob = ifelse(ca_round %in% c("ca3_1", "ca3_2", "ca3_3", "ca5_1", "ca5_2", "ca5_3"), 0.25, 
                                ifelse(ca_round %in% c("ca3_4", "ca3_5", "ca5_4", "ca5_5"), 0.5, NA))) %>%
  mutate(sub_treatment = 
           ifelse(ca_round %in% c("ca1_1", "ca1_2", "ca1_3", "ca1_4", "ca1_5"), "ca1", 
                  ifelse(ca_round %in% c("ca2_1", "ca2_2", "ca2_3", "ca2_4", "ca2_5"), "ca2", 
                         ifelse(ca_round %in% c("ca3_1", "ca3_2", "ca3_3", "ca3_4", "ca3_5"), "ca3", 
                                ifelse(ca_round %in% c("ca4_1", "ca4_2", "ca4_3", "ca4_4", "ca4_5"), "ca4", 
                                       ifelse(ca_round %in% c("ca5_1", "ca5_2", "ca5_3", "ca5_4", "ca5_5"), "ca5", NA)))))) %>%
  mutate(immunity_treatment = 
           ifelse(ca_round %in% c("ca3_1", "ca3_2", "ca3_3", "ca3_4", "ca3_5", "ca5_1", "ca5_2", "ca5_3", "ca5_4", "ca5_5"), 1, 0))


data_long_main.merged2 <- merge(data_long_main.merged, data_main.indiv.controls, by = c("participant.code", "participant.label","participant.payoff", "session.code", "group_id", "cohort", "test_lab.code", "treatment", "action_choice", "ca_round", "immunity_treatment", "immunity_prob", "punish_risk", "punish_cost", "sub_treatment", "group_size"), all.y = TRUE ) %>%
  mutate(period = 
           ifelse(ca_round %in% c("ca1_1", "ca2_1", "ca3_1", "ca4_1", "ca5_1"), 1, 
                  ifelse(ca_round %in% c("ca1_2", "ca2_2", "ca3_2", "ca4_2", "ca5_2"), 2, 
                         ifelse(ca_round %in% c("ca1_3", "ca2_3", "ca3_3", "ca4_3", "ca5_3"), 3, 
                                ifelse(ca_round %in% c("ca1_4", "ca2_4", "ca3_4", "ca4_4", "ca5_4"), 4, 
                                       ifelse(ca_round %in% c("ca1_5", "ca2_5", "ca3_5", "ca4_5", "ca5_5"), 5, NA))))))

data_long_main.punishonly <- data_long_main.merged2 %>%
  filter(str_detect(ca_round, pattern = "ca2") | str_detect(ca_round, pattern = "ca4")) 

data_long_group.punishonly <- data_main.group.action  %>%
  filter(str_detect(ca_round, pattern = "ca2") | str_detect(ca_round, pattern = "ca4")) 

############ Table 4: Pilot Results ###################

model_pilot.m1 <- glm(action_choice ~ punish_risk + punish_cost + treatment, data = data_long_pilot.merged3, family = "binomial"(link = "probit"))
model_pilot.m1_participants <- coeftest(model_pilot.m1, vcovCL, type='HC0', cluster=~participant.code)


model_pilot.m2 <- glm(group_contribution ~ punish_risk + punish_cost + treatment, data  = data_long_pilot.g.merged3, family = "binomial"(link = "probit"))
model_pilot.m2_group <- coeftest(model_pilot.m2, vcovCL, type='HC0', cluster=~group_id)

star <- stargazer(model_pilot.m1, model_pilot.m2,
                  title = "Pilot Results: Punishment Risks + Costs", 
                  covariate.labels = c("Punish Risk","Punish Cost", "Info Treatment"), 
                  se = list(model_pilot.m1_participants[,"Std. Error"], model_pilot.m2_group[,"Std. Error"]), 
                  column.labels = c("Indiv Action", "Group Outcome"),
                  single.row = TRUE)

note.latex <- "\\multicolumn{3}{l} {\textit{Notes:} {$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01}}\\ 
              \\multicolumn{3}{l}{Only games from the second 'punishment' phase of the game are included.} \\"
star[grepl("Note",star)] <- note.latex
cat (star, sep = "\n")


############ Table 5: Lab results (Punishment Risks + Group Size) ###################


m1 <- glm(action_choice ~ punish_risk + treatment + group_size , data = data_long_main.merged2, family = "binomial"(link = "probit"))
m1.participants <- coeftest(m1, vcovCL, type='HC0', cluster=~participant.code)
m2 <- glm(action_choice ~ punish_risk + treatment + holt_sum + riskscale + group_size , data = data_long_main.merged2, family = "binomial"(link = "probit"))
m2.participants <- coeftest(m2, vcovCL, type='HC0', cluster=~participant.code)
m3 <- glm(action_choice ~ punish_risk + treatment + group_size, data = data_long_main.punishonly, family = "binomial"(link = "probit"))
m3.participants <- coeftest(m3, vcovCL, type='HC0', cluster=~participant.code)

g1 <- glm(group_contribution ~ punish_risk + treatment + group_size , data = data_main.group.action, family = "binomial"(link = "probit"))
g1.group <- coeftest(g1, vcovCL, type='HC0', cluster=~group_id)
g2 <- glm(group_contribution ~ punish_risk + treatment + group_size , data = data_long_group.punishonly, family = "binomial"(link = "probit"))
g2.group <- coeftest(g2, vcovCL, type='HC0', cluster=~group_id)


stargazer(m1,m2,m3,g1,g2,
          title = "Lab 10 - 29 Results: Punishment Risks + Group Size",
          covariate.labels = c("Punish Risk", "Info Treatment", "Safe Choices", "Kam Risk", "Group Size"), 
          dep.var.labels = c("Indiv Action", "Group Outcome"), 
          column.labels = c("Basic Model", "+ Controls", "Punishment Phase Only", "Basic Model", "Punishment Phase Only"),
          se = list(m1.participants[,"Std. Error"], m2.participants[,"Std. Error"], m3.participants[,"Std. Error"], g1.group[,"Std. Error"], g2.group[,"Std. Error"]),
          notes.append = FALSE,
          single.row = TRUE
)

############ Table 6: Lab results (Immunity) ###################


data_subset_immune.n <- data_long_main.merged2 %>%
  ungroup() %>%
  filter(str_detect(ca_round, pattern = "ca3") | str_detect(ca_round, pattern = "ca5")) %>%
  filter(p_immunity == 0)

data_subset_immune.y <- data_long_main.merged2 %>%
  ungroup() %>%
  filter(str_detect(ca_round, pattern = "ca3") | str_detect(ca_round, pattern = "ca5")) %>%
  filter(p_immunity == 1)
data_subset_immune.all <- data_long_main.merged2 %>%
  ungroup() %>%
  filter(str_detect(ca_round, pattern = "ca3") | str_detect(ca_round, pattern = "ca5")) 

m4 <- glm(action_choice ~ punish_risk  + immunity_prob + p_immunity + treatment + group_size , data  = data_subset_immune.all, family = "binomial"(link = "probit"))
m4.participants <- coeftest(m4, vcovCL, type='HC0', cluster=~participant.code)

m5 <- glm(action_choice ~ punish_risk + immunity_prob + treatment + group_size , data  = data_subset_immune.y, family = "binomial"(link = "probit"))
m5.participants <- coeftest(m5, vcovCL, type='HC0', cluster=~participant.code)

m6 <- glm(action_choice ~ punish_risk + immunity_prob + treatment + group_size , data  = data_subset_immune.n, family = "binomial"(link = "probit"))
m6.participants <- coeftest(m6, vcovCL, type='HC0', cluster=~participant.code)

m7 <- glm(action_choice ~ punish_risk  + immunity_prob + p_immunity + treatment + holt_sum + riskscale + group_size, data  = data_subset_immune.all, family = "binomial"(link = "probit"))
m7.participants <- coeftest(m7, vcovCL, type='HC0', cluster=~participant.code)

m8 <- glm(action_choice ~ punish_risk + immunity_prob + treatment + holt_sum + riskscale + group_size , data  = data_subset_immune.y, family = "binomial"(link = "probit"))
m8.participants <- coeftest(m8, vcovCL, type='HC0', cluster=~participant.code)

m9 <- glm(action_choice ~ punish_risk + immunity_prob + treatment + holt_sum + riskscale + group_size, data  = data_subset_immune.n, family = "binomial"(link = "probit"))
m9.participants <- coeftest(m9, vcovCL, type='HC0', cluster=~participant.code)

g3 <- glm(group_contribution ~ punish_risk + treatment + immunity_prob + group_size, data = data_subset_group.immune, family = "binomial"(link = "probit"))
g3.group <- coeftest(g3, vcovCL, type='HC0', cluster=~group_id)

stargazer(m4,m5,m6,m7,m8,m9,g3,
          title = "Lab 10 - 29 Results: Effects of Immunity",
          covariate.labels = c("Punish Risk", "Immunity Prob", "Indiv Immune", "Info Treatment", "Safe Choice", "Kam Risk"), 
          dep.var.labels = c("Indiv Action"), 
          column.labels = c("N = all", "N = Immune",  "N = No Immunity", "N = all", "N = Immune", "N = No Immunity"),
          se = list(m4.participants[,"Std. Error"], m5.participants[,"Std. Error"], m6.participants[,"Std. Error"],
                    m7.participants[,"Std. Error"], m8.participants[,"Std. Error"], m9.participants[,"Std. Error"], 
                    g3.group[,"Std. Error"]),
          notes.append = FALSE,
          single.row = FALSE
)


########### Appendix C: Risk Assessment Figures #################


data_pilot.1$kam_q1 <- rescale(data_pilot.1$kam_survey.1.player.q1)
data_pilot.1$kam_q2 <- 1 - rescale(as.numeric(factor(data_pilot.1$kam_survey.2.player.q2, levels = c("Definitely continue playing", "Probably continue playing", "Not sure", "Probably take my winnings","Definitely take my winnings"))))
data_pilot.1$kam_q3 <- rescale(as.numeric(factor(data_pilot.1$kam_survey.3.player.q3, levels = c("Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree"))))
data_pilot.1$kam_q4 <- rescale(as.numeric(factor(data_pilot.1$kam_survey.4.player.q4, levels = c("Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree"))))
data_pilot.1$kam_q5 <- rescale(as.numeric(factor(data_pilot.1$kam_survey.5.player.q5, levels = c("Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree"))))
data_pilot.1$kam_q6 <- rescale(as.numeric(factor(data_pilot.1$kam_survey.6.player.q6, levels = c("Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree"))))
data_pilot.1$kam_q7 <- rescale(as.numeric(factor(data_pilot.1$kam_survey.7.player.q7, levels = c("Difficult", "Somewhat Difficult", "Somewhat Easy", "Very Easy"))))



data_pilot.2 <- data_pilot.1 %>% 
  mutate(riskscale = (kam_q1 + kam_q2 + kam_q3 + kam_q4 + kam_q5 + kam_q6 + kam_q7)/7) %>%
  mutate_at(c("holt_and_laury.1.player.lottery_1", "holt_and_laury.1.player.lottery_2", "holt_and_laury.1.player.lottery_3", "holt_and_laury.1.player.lottery_4", "holt_and_laury.1.player.lottery_5", "holt_and_laury.1.player.lottery_6", "holt_and_laury.1.player.lottery_7", "holt_and_laury.1.player.lottery_8", "holt_and_laury.1.player.lottery_9", "holt_and_laury.1.player.lottery_10"), ~ifelse(. == "Option A", 1, 0)) %>%
  mutate(holt_sum = holt_and_laury.1.player.lottery_1 + holt_and_laury.1.player.lottery_2 + holt_and_laury.1.player.lottery_3 + holt_and_laury.1.player.lottery_4 + holt_and_laury.1.player.lottery_5 + holt_and_laury.1.player.lottery_6 + holt_and_laury.1.player.lottery_7 + holt_and_laury.1.player.lottery_8 + holt_and_laury.1.player.lottery_9 + holt_and_laury.1.player.lottery_10)

data_appendix.pilot <- data_pilot.2 %>%
  ungroup() %>%
  select(starts_with("holt_and_laury.1.player.lottery_")) %>%
  summarize(across(1:10, mean, na.rm = TRUE))

### Appendix Figure 1 ###
png(file = "holt-pilot.png", width = 600, height = 350)
plot(c(1,2,3,4,5,6,7,8,9,10), data_appendix.pilot[1,], xlim = c(0, 10), ylim = c(0, 1), type = "b",
     xlab = "Lottery Decision", ylab = "Probability of Option A", yaxt = "none")
lines(c(1,2,3,4,5,6,7,8,9,10), c(1,1,1,1,0,0,0,0,0,0), type = "b", lty = 2, lwd = 2, pch = 15)
axis(2, las = 2)
dev.off()


data_appendix.main <- data_main.3 %>%
  ungroup() %>%
  select(starts_with("holt_and_laury.1.player.lottery_")) %>%
  summarize(across(1:10, \(x) mean(x, na.rm = TRUE)))

data_appendix.pilot <- data_pilot.2 %>%
  ungroup() %>%
  select(starts_with("holt_and_laury.1.player.lottery_")) %>%
  summarize(across(1:10, \(x) mean(x, na.rm = TRUE)))


### Appendix Figure 2 ###
png(file = "holt-main.png", width = 600, height = 350)
plot(c(1,2,3,4,5,6,7,8,9,10), data_appendix.main[1,], xlim = c(0, 10), ylim = c(0, 1), type = "b",
     xlab = "Lottery Decision", ylab = "Probability of Option A", yaxt = "none")
lines(c(1,2,3,4,5,6,7,8,9,10), c(1,1,1,1,0,0,0,0,0,0), type = "b", lty = 2, lwd = 2, pch = 15)
axis(2, las = 2)
dev.off()


mean_riskscale_pilot <- mean(data_pilot.2$riskscale, na.rm = TRUE)
sd_riskscale_pilot <- sd(data_pilot.2$riskscale, na.rm = TRUE)
min_riskscale_pilot <- min(data_pilot.2$riskscale, na.rm = TRUE)
max_riskscale_pilot <- max(data_pilot.2$riskscale, na.rm = TRUE)

mean_riskscale_main <- mean(data_main.3$riskscale, na.rm = TRUE)
sd_riskscale_main <- sd(data_main.3$riskscale, na.rm = TRUE)
min_riskscale_main <- min(data_main.3$riskscale, na.rm = TRUE)
max_riskscale_main <- max(data_main.3$riskscale, na.rm = TRUE)


risk_attitude_summary <- data.frame(
  'Risk Acceptance Scale' = c('Pilot', 'Main Study'),
  'Mean' = c(mean_riskscale_pilot, mean_riskscale_main),  # Replace NA with the mean of the main study
  'St Dev' = c(sd_riskscale_pilot, sd_riskscale_main),  # Replace NA with the standard error of the main study
  'Min' = c(min_riskscale_pilot, min_riskscale_main),  # Replace NA with the min of the main study
  'Max' = c(max_riskscale_pilot, max_riskscale_main)  # Replace NA with the max of the main study
)
latex_table <- xtable(risk_attitude_summary, caption = "Risk Attitude Summary Statistics", label = "tab:riskscale",digits = c(0, 0, 3, 3, 3, 3))

## Appendix Table 7 ##
print(latex_table, include.rownames = FALSE, hline.after = c(-1, 0, nrow(risk_attitude_summary)), comment = FALSE,digits = c(0, 3, 4, 4, 4))
